pacman::p_load(tidyverse, readr, psych, st, stars, tmap, sf,
ggstatsplot, plotly, ggplot2, ggdist, dplyr, ggiraph)Exploratory Data Analysis - Rainfall
1 Import Packages
2 Load the prepared files
Let’s load the RDS files after data preparation.
rainfall <- readRDS("data/rainfall.rds")
describe(rainfall) vars n mean sd median trimmed mad min max
Station* 1 5547 10.51 5.02 12.0 10.69 5.93 1 18.0
Region* 2 5547 3.51 1.39 4.0 3.63 1.48 1 5.0
Year 3 5547 2006.25 12.34 2010.0 2007.10 13.34 1980 2023.0
Month* 4 5547 6.52 3.45 7.0 6.52 4.45 1 12.0
Date 5 5547 NaN NA NA NaN NA Inf -Inf
TotalRainfall 6 5547 205.65 110.97 190.6 197.50 103.93 0 986.5
TotalRainfall30 7 5547 36.09 62.52 0.0 22.29 0.00 0 320.6
TotalRainfall60 8 5547 45.01 78.85 0.0 27.38 0.00 0 424.0
TotalRainfall120 9 5547 51.38 90.45 0.0 31.05 0.00 0 493.2
range skew kurtosis se
Station* 17.0 -0.24 -1.28 0.07
Region* 4.0 -0.39 -1.25 0.02
Year 43.0 -0.50 -0.99 0.17
Month* 11.0 0.00 -1.22 0.05
Date -Inf NA NA NA
TotalRainfall 986.5 0.85 1.51 1.49
TotalRainfall30 320.6 1.64 1.71 0.84
TotalRainfall60 424.0 1.71 2.10 1.06
TotalRainfall120 493.2 1.75 2.35 1.21
3 Map of Singapore
mpsz <- st_read(dsn = "data/geospatial", layer = "MPSZ-2019") %>%
st_transform(crs=3414)Reading layer `MPSZ-2019' from data source
`C:\Vanessa\SMU\Term 4 - Visual Analytics & Applications\mvheng\Group11_VAP\EDA\data\geospatial'
using driver `ESRI Shapefile'
Simple feature collection with 332 features and 6 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 103.6057 ymin: 1.158699 xmax: 104.0885 ymax: 1.470775
Geodetic CRS: WGS 84
glimpse(mpsz)Rows: 332
Columns: 7
$ SUBZONE_N <chr> "MARINA EAST", "INSTITUTION HILL", "ROBERTSON QUAY", "JURON…
$ SUBZONE_C <chr> "MESZ01", "RVSZ05", "SRSZ01", "WISZ01", "MUSZ02", "MPSZ05",…
$ PLN_AREA_N <chr> "MARINA EAST", "RIVER VALLEY", "SINGAPORE RIVER", "WESTERN …
$ PLN_AREA_C <chr> "ME", "RV", "SR", "WI", "MU", "MP", "WI", "WI", "SI", "SI",…
$ REGION_N <chr> "CENTRAL REGION", "CENTRAL REGION", "CENTRAL REGION", "WEST…
$ REGION_C <chr> "CR", "CR", "CR", "WR", "CR", "CR", "WR", "WR", "CR", "CR",…
$ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((33222.98 29..., MULTIPOLYGON (…
Let’s take a look at the planning areas for the 5 regions.
tmap_mode("view")
tm_shape(mpsz) +
tm_polygons(col = "REGION_N", palette = "Set2")+
tm_layout(main.title = "Planning Area",
main.title.position = "left",
main.title.size = 1,
legend.show = FALSE,
frame = FALSE) +
tmap_options(check.and.fix = TRUE) +
tm_view(set.zoom.limits = c(11,12))4 Rainfall analysis
4.1 Analyse rainfall using maps
Let’s map the station to the planning area (PA).
Show the code
station_to_PA <- c(
"Admiralty" = "WOODLANDS",
"Ang Mo Kio" = "ANG MO KIO",
"Boon Lay (East)" = "BOON LAY",
"Changi" = "CHANGI",
"Choa Chu Kang (South)" = "CHOA CHU KANG",
"Clementi" = "CLEMENTI",
"East Coast Parkway" = "BEDOK",
"Jurong (West)" = "JURONG WEST",
"Khatib" = "YISHUN",
"Marina Barrage" = "DOWNTOWN CORE",
"Newton" = "NEWTON",
"Pasir Panjang" = "PASIR PANJANG",
"Paya Lebar" = "PAYA LEBAR",
"Seletar" = "SELETAR",
"Sembawang" = "SEMBAWANG",
"Tai Seng" = "HOUGANG",
"Tengah" = "TENGAH",
"Tuas South" = "TUAS"
)
rainfall$PA <- station_to_PA[rainfall$Station]
rainfall <- rainfall[, c("PA", setdiff(names(rainfall), "PA"))]
head(rainfall)# A tibble: 6 × 10
PA Station Region Year Month Date TotalRainfall TotalRainfall30
<chr> <chr> <chr> <dbl> <ord> <date> <dbl> <dbl>
1 WOODLANDS Admiral… North 2009 Jan 2009-01-01 0.8 0
2 WOODLANDS Admiral… North 2009 Feb 2009-02-01 148 0
3 WOODLANDS Admiral… North 2009 Mar 2009-03-01 348 0
4 WOODLANDS Admiral… North 2009 Apr 2009-04-01 149. 0
5 WOODLANDS Admiral… North 2009 May 2009-05-01 206. 0
6 WOODLANDS Admiral… North 2009 Jun 2009-06-01 92 0
# ℹ 2 more variables: TotalRainfall60 <dbl>, TotalRainfall120 <dbl>
rain_map <- rainfall %>%
group_by(PA, Station, Year) %>%
summarise(Annual_Rainfall =
sum(TotalRainfall, na.rm = TRUE)) %>%
ungroup()
glimpse(rain_map)Rows: 469
Columns: 4
$ PA <chr> "ANG MO KIO", "ANG MO KIO", "ANG MO KIO", "ANG MO KIO"…
$ Station <chr> "Ang Mo Kio", "Ang Mo Kio", "Ang Mo Kio", "Ang Mo Kio"…
$ Year <dbl> 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, …
$ Annual_Rainfall <dbl> 830.6, 2849.2, 3050.4, 2579.6, 3240.0, 1961.4, 2018.4,…
mpsztemp <- left_join(mpsz, rain_map,
by = c("PLN_AREA_N" = "PA"))
glimpse(mpsztemp)Rows: 3,485
Columns: 10
$ SUBZONE_N <chr> "MARINA EAST", "INSTITUTION HILL", "ROBERTSON QUAY", "…
$ SUBZONE_C <chr> "MESZ01", "RVSZ05", "SRSZ01", "WISZ01", "MUSZ02", "MPS…
$ PLN_AREA_N <chr> "MARINA EAST", "RIVER VALLEY", "SINGAPORE RIVER", "WES…
$ PLN_AREA_C <chr> "ME", "RV", "SR", "WI", "MU", "MP", "WI", "WI", "SI", …
$ REGION_N <chr> "CENTRAL REGION", "CENTRAL REGION", "CENTRAL REGION", …
$ REGION_C <chr> "CR", "CR", "CR", "WR", "CR", "CR", "WR", "WR", "CR", …
$ Station <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Marina Ba…
$ Year <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 2010, 2011…
$ Annual_Rainfall <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1112.0, 23…
$ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((33222.98 29..., MULTIPOLY…
Let’s plot the annual mean temperature distribution across Singapore.
tm_shape(mpsztemp) +
tm_polygons(col = "Annual_Rainfall",
palette = "Blues",
style = "jenks") +
tm_view(set.zoom.limits = c(11,12))